Data

x <- c(1.6907, 1.7242, 1.7552, 1.7842, 1.8113, 1.8369, 1.8610, 1.8839)
n1 <- c(6,13,18,28,52,53,61,60)
n0 <- c(59,60,62,56,63,59,62,60) - n1

Theoretical Part

Logit Model

\[log(\frac{p}{1-p})=\beta_1+\beta_2x \Leftrightarrow p(y=1|x) = \frac{e^{\beta_1 + \beta_2x}}{1+e^{\beta_1 + \beta_2x}}\]

Likelihood function

Likelihood for logit model

\[l((x_k,n_k)_k; \beta)=\prod_{j=1}^k(\prod_{i=1}^{n_j}(\pi_j)^{y_{ij}}(1-\pi_j)^{1-y_{ij}})\]

\[l((x_k,n_k)_k; \beta)=\prod_{j=1}^k(\prod_{i=1}^{n_j}(\frac{e^{\beta_1+\beta_2x_j}}{1 + e^{\beta_1+\beta_2x_j}})^{y_{ij}}(\frac{1}{1 + e^{\beta_1+\beta_2x_j}})^{1-y_{ij}})\]

\[l((x_k,n_k)_k; \beta)=\prod_{j=1}^k(\frac{e^{\beta_1+\beta_2x_j}}{1 + e^{\beta_1+\beta_2x_j}})^{n_j^1}(\frac{1}{1 + e^{\beta_1+\beta_2x_j}})^{n_j^0}\]

\[l((x_k,n_k)_k; \beta) =\prod_{j=1}^k(\frac{e^{\beta_1+\beta_2x_j}}{1 + e^{\beta_1+\beta_2x_j}})^{n_j^1}(\frac{1}{1 + e^{\beta_1+\beta_2x_j}})^{n_j^0}\]

Log-Likelihood

\[L((x_k,n_k)_k; \beta) = \sum_{j=1}^klog((\frac{e^{\beta_1+\beta_2x_j}}{1 + e^{\beta_1+\beta_2x_j}})^{n_j^1}) + log((\frac{1}{1 + e^{\beta_1+\beta_2x_j}})^{n_j^0})\]

\[L((x_k,n_k)_k; \beta) = \sum_{j=1}^k-n_j^1log(1 + e^{-(\beta_1+\beta_2x_j)}) - {n_j^0}(\beta_1+\beta_2x_j)-n^0_jlog(1 + e^{-(\beta_1+\beta_2x_j)}))\]

Score and Fisher Information

Score Vector U

\[U(\beta) = \sum_{j=1}^k(n^1_j(1-\pi_j) -n^0_j\pi_j, x_j(n^1_j(1-\pi_j) -n^0_j\pi_j)\]

Fisher Information

\[I_{11}=-\frac{\partial^2 L}{\partial \beta_1^2} = \frac{\partial L}{\partial \beta_1}\] \[I_{22}=-\frac{\partial^2 L}{\partial \beta_2^2} = \sum_{j=1}^k x_j^2(\frac{n^1_j(1-\pi_j)}{\pi_j} -\frac{n^0_j\pi_j}{(1-\pi_j)})\]

\[I_{12}=-\frac{\partial^2 L}{\partial \beta_1 \beta_2} = \sum_{j=1}^k x_j(\frac{n^1_j(1-\pi_j)}{\pi_j} -\frac{n^0_j\pi_j}{(1-\pi_j)})\]

\[I_{21}=I_{12}\]

\[I(\beta)^{-1} = \frac{\begin{bmatrix}\sum_{j=1}^k x_j^2(\frac{n^1_j(1-\pi_j)}{\pi_j} -\frac{n^0_j\pi_j}{(1-\pi_j)} &-\sum_{j=1}^k x_j(\frac{n^1_j(1-\pi_j)}{\pi_j} -\frac{n^0_j\pi_j}{(1-\pi_j)})\\-\sum_{j=1}^k x_j(\frac{n^1_j(1-\pi_j)}{\pi_j} -\frac{n^0_j\pi_j}{(1-\pi_j)})& \sum_{j=1}^k(\frac{n^1_j(1-\pi_j)}{\pi_j} -\frac{n^0_j\pi_j}{(1-\pi_j)}) \end{bmatrix}}{(\sum_{j=1}^k x_j^2(\frac{n^1_j(1-\pi_j)}{\pi_j} -\frac{n^0_j\pi_j}{(1-\pi_j)}))(\sum_{j=1}^k(\frac{n^1_j(1-\pi_j)}{\pi_j} -\frac{n^0_j\pi_j}{(1-\pi_j)})) - (\sum_{j=1}^k x_j(\frac{n^1_j(1-\pi_j)}{\pi_j} -\frac{n^0_j\pi_j}{(1-\pi_j)}))^2}\]

Newton-Raphson Algorithm

Recurrence relation for Beta estimation

\[ \beta_{n+1} = \beta_n + (I(\beta_n))^{-1}U(\beta_n), n \ge 1\]

Initial values

# option glm

# option log_odds ~ x with fit.lm

# option small values

# option visualization

Adjustments Tests

Deviance D

\[D = 2[L(\beta_{max};y) - L(\beta;y)] \]

Pearson Statistics

General Formula:

\[S_w = \sum_{j=1}^k\frac{(y_j - E(y_j))^2}{Var(y_j)}\]

\[ X^2 = \sum_{j=1}^k \frac{(y_j - n_j^1\hat{\pi}_j)^2}{n_j^1\hat{\pi}_j(1-\hat{\pi}_j)} \sim \chi^2_{(k-2)}\]

We obtain:

\[X^2 = \sum_{j=1}^k \frac{(y_j - (n_j^1+n_j^0) (\frac{e^{\hat{\beta}_0 + \hat{\beta}_1x_j}}{1 + e^{\hat{\beta}_0 + \hat{\beta}_1x_j}}))^2}{(n_j^1+n_j^0) (\frac{e^{\hat{\beta}_0 + \hat{\beta}_1x_j}}{1 + e^{\hat{\beta}_0 + \hat{\beta}_1x_j}})(\frac{1}{1 + e^{\hat{\beta}_0 + \hat{\beta}_1x_j}})} \]

Deviance Residuals

\[d_j = sign(y_j - (n_j^1+n_j^0)\hat{\pi}_j)\sqrt{2[y_jlog(\frac{y_j}{(n_j^1+n_j^0)\hat{\pi}_j})+((n_j^1+n_j^0)-y_j)log(\frac{(n_j^1+n_j^0)-y_j}{(n_j^1+n_j^0)(1 - \hat{\pi}_j)})]}\]

Pearson Residuals

\[r_j = \frac{y_j - \hat{\pi}_j}{\sqrt{\hat{Var(y_j)}}}\]

\[ r_j = \frac{y_j - (n_j^1+n_j^0) (\frac{e^{\hat{\beta}_0 + \hat{\beta}_1x_j}}{1 + e^{\hat{\beta}_0 + \hat{\beta}_1x_j}})}{\sqrt{(n_j^1+n_j^0) (\frac{e^{\hat{\beta}_0 + \hat{\beta}_1x_j}}{1 + e^{\hat{\beta}_0 + \hat{\beta}_1x_j}})(\frac{1}{1 + e^{\hat{\beta}_0 + \hat{\beta}_1x_j}})}} \]

Probit Model

\[p=\phi(\beta_1+\beta_2x)\]

Likelihood function

Likelihood for logit model

\[\prod_{j=1}^k(\Phi(x_j\beta)^{n^1_j}(1 - \Phi(x_j\beta))^{n^0_j}\]

Log-Likelihood

\[L(X,\beta_1, \beta_2) = \sum_{j=1}^k (n^1_jlog(\Phi(x_j\beta)) + n^0_jlog(1- \Phi(x_j\beta)))\]

\[L(X,\beta_1, \beta_2) = \sum_{j=1}^k (n^1_jlog(\int_0^{\beta_0 + \beta_1x_j}\frac{e^{-\frac{x^2}{2}}}{\sqrt{2\pi}}dx) + n^0_jlog(1- \int_0^{\beta_0 + \beta_1x_j}\frac{e^{-\frac{x^2}{2}}}{\sqrt{2\pi}}dx))\]

Score and Fisher Information

Score Vector U

\[U(\beta) = (\sum_{j=1}^n \frac{e^{-\frac{1}{2}}(n^1_j -n^0_j)}{\sqrt{2\pi}\Phi(\beta_1+\beta_2x_j)}, \sum_{j=1}^n\frac{e^{-\frac{-x_j^2}{2}}(n^1_j -n^0_j)}{\sqrt{2\pi}\Phi(\beta_1 + \beta_2 x_j)})\]

Fisher Information

\[I(\beta) = \begin{bmatrix} \sum_{j=1}^n\frac{e^{-1}(n^1_j - n^0_j)}{2\pi\Phi(\beta_1 + \beta_2 x_j)^2} & \sum_{j=1}^n\frac{e^{-\frac{1}{2}(x_j^2+1)}(n^1_j - n^0_j)}{2\pi\Phi(\beta_1 + \beta_2 x_j)^2}\\ \sum_{j=1}^n\frac{e^{-\frac{1}{2}(x_j^2+1)}(n^1_j - n^0_j)}{2\pi\Phi(\beta_1 + \beta_2 x_j)^2} & \sum_{j=1}^n\frac{e^{-x_j^2}(n^1_j - n^0_j)}{2\pi\Phi(\beta_1 + \beta_2 x_j)^2} \end{bmatrix}\]

\[I(\beta)^{-1} = \frac{\begin{bmatrix} \sum_{j=1}^n\frac{e^{-1}(n^1_j - n^0_j)}{2\pi\Phi(\beta_1 + \beta_2 x_j)^2} & -\sum_{j=1}^n\frac{e^{-\frac{1}{2}(x_j^2+1)}(n^1_j - n^0_j)}{2\pi\Phi(\beta_1 + \beta_2 x_j)^2}\\ -\sum_{j=1}^n\frac{e^{-\frac{1}{2}(x_j^2+1)}(n^1_j - n^0_j)}{2\pi\Phi(\beta_1 + \beta_2 x_j)^2} & \sum_{j=1}^n\frac{e^{-x_j^2}(n^1_j - n^0_j)}{2\pi\Phi(\beta_1 + \beta_2 x_j)^2} \end{bmatrix}}{(\sum_{j=1}^n\frac{e^{-1}(n^1_j - n^0_j)}{2\pi\Phi(\beta_1 + \beta_2 x_j)^2})( \sum_{j=1}^n\frac{e^{-x_j^2}(n^1_j - n^0_j)}{2\pi\Phi(\beta_1 + \beta_2 x_j)^2}) - (\sum_{j=1}^n\frac{e^{-\frac{1}{2}(x_j^2+1)}(n^1_j - n^0_j)}{2\pi\Phi(\beta_1 + \beta_2 x_j)^2})^2}\]

Newton-Raphson Algorithm

Recurrence relation for Beta estimation

\[ \beta_{n+1} = \beta_n + (I(\beta_n))^{-1}U(\beta_n), n \ge 1\]

Initial values

# option glm

# option log_odds ~ x with fit.lm

# option small values

# option visualization

Adjustments Tests

Deviance D

\[D = 2[L(\beta_{max};y) - L(\beta;y)] \]

Pearson Statistics

General Formula:

\[S_w = \sum_{j=1}^k\frac{(y_j - E(y_j))^2}{Var(y_j)}\]

\[ X^2 = \sum_{j=1}^k \frac{(y_j - n_j^1\hat{\pi}_j)^2}{n_j^1\hat{\pi}_j(1-\hat{\pi}_j)} \sim \chi^2_{(k-2)}\]

We obtain:

\[X^2 = \sum_{j=1}^k \frac{(y_j - (n_j^1+n_j^0) (\frac{e^{\hat{\beta}_0 + \hat{\beta}_1x_j}}{1 + e^{\hat{\beta}_0 + \hat{\beta}_1x_j}}))^2}{(n_j^1+n_j^0) (\frac{e^{\hat{\beta}_0 + \hat{\beta}_1x_j}}{1 + e^{\hat{\beta}_0 + \hat{\beta}_1x_j}})(\frac{1}{1 + e^{\hat{\beta}_0 + \hat{\beta}_1x_j}})} \]

Deviance Residuals

\[d_j = sign(y_j - (n_j^1+n_j^0)\hat{\pi}_j)\sqrt{2[y_jlog(\frac{y_j}{(n_j^1+n_j^0)\hat{\pi}_j})+((n_j^1+n_j^0)-y_j)log(\frac{(n_j^1+n_j^0)-y_j}{(n_j^1+n_j^0)(1 - \hat{\pi}_j)})]}\]

Pearson Residuals

\[r_j = \frac{y_j - \hat{\pi}_j}{\sqrt{\hat{Var(y_j)}}}\]

\[ r_j = \frac{y_j - (n_j^1+n_j^0) (\frac{e^{\hat{\beta}_0 + \hat{\beta}_1x_j}}{1 + e^{\hat{\beta}_0 + \hat{\beta}_1x_j}})}{\sqrt{(n_j^1+n_j^0) (\frac{e^{\hat{\beta}_0 + \hat{\beta}_1x_j}}{1 + e^{\hat{\beta}_0 + \hat{\beta}_1x_j}})(\frac{1}{1 + e^{\hat{\beta}_0 + \hat{\beta}_1x_j}})}} \]

Cloglog Model

\[log(\frac{p}{1-p})=\beta_1+\beta_2x \Leftrightarrow p(y=1|x) = \frac{e^{\beta_1 + \beta_2x}}{1+e^{\beta_1 + \beta_2x}}\]

Likelihood function

Likelihood for logit model

\[l((x_k,n_k)_k; \beta)= \prod_{j=1}^k (1 - e^{-e^{(\beta_1 + \beta_2 x_j)}})^{n^1_j}(e^{-e^{(\beta_1 + \beta_2 x_j)}})^{n^0_j}\]

Log-Likelihood

\[L(X,\beta_1, \beta_2) = log(\prod_{j=1}^k (1 - e^{-e^{(\beta_1 + \beta_2 x_j)}})^{n^1_j}(e^{-e^{(\beta_1 + \beta_2 x_j)}})^{n^0_j}) = \sum_{j=1}^k (n^1_j log(\pi_j) + n^0_jlog(1-\pi_j))\]

Score and Fisher Information

Score Vector U

\[U(\beta) = (\sum_{j=1}^k\frac{e^{-e^{(\beta_1 + \beta_2 x_j)}+(\beta_1 + \beta_2 x_j)}}{(1 - e^{-e^{\beta_1 + \beta_2 x_j}})}(n^1_j-n^0_j), \sum_{j=1}^k\frac{x_je^{-e^{(\beta_1 + \beta_2 x_j)}+(\beta_1 + \beta_2 x_j)}}{(1 - e^{-e^{\beta_1 + \beta_2 x_j}})}(n^1_j-n^0_j))\]

Fisher Information

\[I(\beta) = \begin{bmatrix} \sum_{j=1}^k\frac{(e^{-e^{(\beta_1 + \beta_2 x_j)}+(\beta_1 + \beta_2 x_j)})(e^{-e^{\beta_1 + \beta_2 x_j}} + e^{\beta_1 + \beta_2 x_j} - 1)}{(1 - e^{-e^{\beta_1 + \beta_2 x_j}})^2}(n^1_j-n^0_j) & \sum_{j=1}^k\frac{x_j(e^{-e^{(\beta_1 + \beta_2 x_j)}+(\beta_1 + \beta_2 x_j)})(e^{-e^{\beta_1 + \beta_2 x_j}} + e^{\beta_1 + \beta_2 x_j} - 1)}{(1 - e^{-e^{\beta_1 + \beta_2 x_j}})^2}(n^1_j-n^0_j)\\ \sum_{j=1}^k\frac{x_j(e^{-e^{(\beta_1 + \beta_2 x_j)}+(\beta_1 + \beta_2 x_j)})(e^{-e^{\beta_1 + \beta_2 x_j}} + e^{\beta_1 + \beta_2 x_j} - 1)}{(1 - e^{-e^{\beta_1 + \beta_2 x_j}})^2}(n^1_j-n^0_j) & \sum_{j=1}^k\frac{x_j^2(e^{-e^{(\beta_1 + \beta_2 x_j)}+(\beta_1 + \beta_2 x_j)})(e^{-e^{\beta_1 + \beta_2 x_j}} + e^{\beta_1 + \beta_2 x_j} - 1)}{(1 - e^{-e^{\beta_1 + \beta_2 x_j}})^2}(n^1_j-n^0_j) \end{bmatrix}\]

\[I(\beta) = \begin{bmatrix} \sum_{i=1}^n\frac{(e^{-\mu_j(\beta)}\mu_j(\beta))(e^{-\mu_j(\beta)} + \mu_j(\beta) - 1)}{(1 - e^{-\mu_j(\beta)})^2}(n^1_j - n^0_j) & \sum_{i=1}^n\frac{x_j(e^{-\mu_j(\beta)}\mu_j(\beta))(e^{-\mu_j(\beta)} + \mu_j(\beta) - 1)}{(1 - e^{-\mu_j(\beta)})^2}(n^1_j - n^0_j)\\ \sum_{i=1}^n\frac{x_j(e^{-\mu_j(\beta)}\mu_j(\beta))(e^{-\mu_j(\beta)} + \mu_j(\beta) - 1)}{(1 - e^{-\mu_j(\beta)})^2}(n^1_j - n^0_j) & \sum_{i=1}^n\frac{x_j^2(e^{-\mu_j(\beta)}\mu_j(\beta))(e^{-\mu_j(\beta)} + \mu_j(\beta) - 1)}{(1 - e^{-\mu_j(\beta)})^2}(n^1_j - n^0_j) \end{bmatrix}\]

\[I(\beta)^{-1} = \frac{\begin{bmatrix} \sum_{j=1}^k\frac{x_j^2(e^{-\mu_j(\beta)}\mu_j(\beta))(e^{-\mu_j(\beta)} + \mu_j(\beta) - 1)}{(1 - e^{-\mu_j(\beta)})^2}(n^1_j - n^0_j) & -\sum_{j=1}^k\frac{x_j(e^{-\mu_j(\beta)}\mu_j(\beta))(e^{-\mu_j(\beta)} + \mu_j(\beta) - 1)}{(1 - e^{-\mu_j(\beta)})^2}(n^1_j - n^0_j)\\ -\sum_{j=1}^k\frac{x_j(e^{-\mu_j(\beta)}\mu_j(\beta))(e^{-\mu_j(\beta)} + \mu_j(\beta) - 1)}{(1 - e^{-\mu_j(\beta)})^2}(n^1_j - n^0_j) & \sum_{j=1}^k\frac{(e^{-\mu_j(\beta)}\mu_j(\beta))(e^{-\mu_j(\beta)} + \mu_j(\beta) - 1)}{(1 - e^{-\mu_j(\beta)})^2}(n^1_j - n^0_j) \end{bmatrix}}{(\sum_{j=1}^k\frac{x_j^2(e^{-\mu_j(\beta)}\mu_j(\beta))(e^{-\mu_j(\beta)} + \mu_j(\beta) - 1)(n^1_j - n^0_j)}{(1 - e^{-\mu_j(\beta)})^2})(\sum_{j=1}^k\frac{(e^{-\mu_j(\beta)}\mu_j(\beta))(e^{-\mu_j(\beta)} + \mu_j(\beta) - 1)(n^1_j - n^0_j)}{(1 - e^{-\mu_j(\beta)})^2}) - (\sum_{j=1}^k\frac{x_j(e^{-\mu_j(\beta)}\mu_j(\beta))(e^{-\mu_j(\beta)} + \mu_j(\beta) - 1)(n^1_j - n^0_j)}{(1 - e^{-\mu_j(\beta)})^2})^2}\]

Newton-Raphson Algorithm

Recurrence relation for Beta estimation

\[ \beta_{n+1} = \beta_n + (I(\beta_n))^{-1}U(\beta_n), n \ge 1\]

Initial values

# option glm

# option log_odds ~ x with fit.lm

# option small values

# option visualization

Adjustments Tests

Deviance D

\[D = 2[L(\beta_{max};y) - L(\beta;y)] \]

Pearson Statistics

General Formula:

\[S_w = \sum_{j=1}^k\frac{(y_j - E(y_j))^2}{Var(y_j)}\]

\[ X^2 = \sum_{j=1}^k \frac{(y_j - n_j^1\hat{\pi}_j)^2}{n_j^1\hat{\pi}_j(1-\hat{\pi}_j)} \sim \chi^2_{(k-2)}\]

We obtain:

\[X^2 = \sum_{j=1}^k \frac{(y_j - (n_j^1+n_j^0) (\frac{e^{\hat{\beta}_0 + \hat{\beta}_1x_j}}{1 + e^{\hat{\beta}_0 + \hat{\beta}_1x_j}}))^2}{(n_j^1+n_j^0) (\frac{e^{\hat{\beta}_0 + \hat{\beta}_1x_j}}{1 + e^{\hat{\beta}_0 + \hat{\beta}_1x_j}})(\frac{1}{1 + e^{\hat{\beta}_0 + \hat{\beta}_1x_j}})} \]

Deviance Residuals

\[d_j = sign(y_j - (n_j^1+n_j^0)\hat{\pi}_j)\sqrt{2[y_jlog(\frac{y_j}{(n_j^1+n_j^0)\hat{\pi}_j})+((n_j^1+n_j^0)-y_j)log(\frac{(n_j^1+n_j^0)-y_j}{(n_j^1+n_j^0)(1 - \hat{\pi}_j)})]}\]

Pearson Residuals

\[r_j = \frac{y_j - \hat{\pi}_j}{\sqrt{\hat{Var(y_j)}}}\]

\[ r_j = \frac{y_j - (n_j^1+n_j^0) (\frac{e^{\hat{\beta}_0 + \hat{\beta}_1x_j}}{1 + e^{\hat{\beta}_0 + \hat{\beta}_1x_j}})}{\sqrt{(n_j^1+n_j^0) (\frac{e^{\hat{\beta}_0 + \hat{\beta}_1x_j}}{1 + e^{\hat{\beta}_0 + \hat{\beta}_1x_j}})(\frac{1}{1 + e^{\hat{\beta}_0 + \hat{\beta}_1x_j}})}} \]

Pratical Part

Logit Model

Data visualization

Plot

library(ggplot2)
library(dplyr)
## 
## Attachement du package : 'dplyr'
## Les objets suivants sont masqués depuis 'package:stats':
## 
##     filter, lag
## Les objets suivants sont masqués depuis 'package:base':
## 
##     intersect, setdiff, setequal, union
x <- c(1.6907, 1.7242, 1.7552, 1.7842, 1.8113, 1.8369, 1.8610, 1.8839)
n1 <- c(6,13,18,28,52,53,61,60)
n0 <- c(59,60,62,56,63,59,62,60) - n1
p <- n1/(n0+n1)
df <- data.frame(x=x, p=p)
df %>% ggplot(aes(x=x, y=p)) + 
  geom_line() +
  geom_point(color="red") +
  labs(
    title= "Observed proportion as a function of dose x",
    x="Dose x",
    y="Observed proportion p"
  )

mean_val <- mean(x)
sd_val <- sd(x)
lower_limit <- mean_val - 2*sd_val
upper_limit <- mean_val + 2*sd_val
df <- data.frame(
  index = 1:length(x),
  value = x
)

ggplot(df, aes(x=index, y=value)) +
  geom_point(size=3) +
  geom_hline(yintercept=mean(df$value), color="blue", size=1.2) +
  geom_hline(yintercept=upper_limit, color="red", linetype="dashed") +
  geom_hline(yintercept=lower_limit, color="red", linetype="dashed") +
  theme_minimal() +
  labs(title="No under/over-representation justifying weighting the log-likelihood function before optimization",
       x="Sample Index",
       y="Deviation to the mean")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Comments

The relation is increasing at different rate depending on x. It is a non linear relation.

Parameter Estimation

Newton-Raphson Algorithm Implementation up to Convergence

library(ggplot2)
library(tidyr)
library(dplyr)

##data

x <- c(1.6907, 1.7242, 1.7552, 1.7842, 1.8113, 1.8369, 1.8610, 1.8839)
x_scaled <- (x-mean(x))/sd(x)
n1 <- c(6 ,13,18,28,52,53,61,60)
n0 <- c(59,60,62,56,63,59,62,60) - n1

## general

logistic <- function(z) {
  return(ifelse(z >= 0, 
                1 / (1 + exp(-z)), 
                exp(z) / (1 + exp(z))))
}

prob <- function(beta) {
  z <- beta[1] + beta[2]*x
  return(logistic(z))
}


log_lik <- function(beta) {
  p <- prob(beta)
  return(sum(n1*log(p+eps)+ n0*log(1-p+eps)))
}

## Score

U <- function(beta) {
  U <- matrix(nrow=2, ncol=1)
  p <- prob(beta)
  U[1] <- sum(n1*(1-p)-n0*p)
  U[2] <- sum(x*(n1*(1-p)-n0*p))
  return(U)
}


stoch_U <- function(beta, index) {
  G <- matrix(nrow=2, ncol=1)
  p <- prob(beta)
  w1 <- n1*(1-p)-n0*p
  w2 <- x*(n1*(1-p)-n0*p)
  G[1] <- sum(w1[index])
  G[2] <- sum(w2[index])
  return(G)
}

##Fisher Information

stoch_I <- function(beta, index) {
  com <- matrix(ncol=2, nrow=2)
  p <- prob(beta)
  v1 <- x^2*(n1*(1-p)/(p+eps) - (n0*p)/(1-p+eps))
  v2 <- -x*n1*(1-p)/(p+eps) - n0*p/(1-p+eps)
  v3 <- v2
  v4 <- n1*(1-p)/(p+eps) - n0*p/(1-p+eps)
  com[1,1] <- sum(v1[index])
  com[1,2] <- sum(v2[index])
  com[2,1] <- com[1,2]
  com[2,2] <- sum(v3[index])
  det <- com[1,1]*com[2,2] - com[1,2]^2
  if (abs(det) < 1e-6) {
    warning("Déterminant trop petit, inversion instable")
    det <- diag(2)*1e6
  }
  return(com/(det+eps))
}

I <- function(beta) {
  com <- matrix(ncol=2, nrow=2)
  p <- prob(beta)
  com[1,1] <- sum(x^2*(n1*(1-p)/(p+eps) - (n0*p)/(1-p+eps)))
  com[1,2] <- -sum(x*(n1*(1-p)/(p+eps) - n0*p/(1-p+eps)))
  com[2,1] <- com[1,2]
  com[2,2] <- sum(n1*(1-p)/(p+eps) - n0*p/(1-p+eps))
  det <- (com[1,1]*com[2,2] - com[1,2]^2)
  if (abs(det) < 1e-6) {
    warning("Déterminant trop petit, inversion instable")
    det <- diag(2)*1e6
  }
  return(com/(det+eps))
}

## EMV Algo

EMV_algo <- function(beta, n) {
  new_beta <- beta + (I(beta)%*%U(beta))
  return(new_beta)
}

EMV_algo_lambda <- function(beta, n) {
  ll_old <- log_lik(beta)
  step <- 1
  if (n > 20000) {
    param <- sqrt(lambda/n)
  } else {
    param <- lambda/sqrt(n)
  }
  for (i in 1:10) {
    beta_check <- beta + param*step*(I(beta)%*%U(beta))
    ll_new <- log_lik(beta_check)
    if (all(is.finite(beta_check)) && ll_new >= ll_old) {
      break
    }
    step <- step / 2
  }
  beta_star <- beta_check
  return(beta_star)
}

EMV_algo_stoch <- function(beta, n, index) {
  ll_old <- log_lik(beta)
  step <- 1
  if (n > 20000) {
    param <- sqrt(lambda/n)
  } else {
    param <- lambda/sqrt(n)
  }
  for (i in 1:10) {
    beta_check <- beta + param*step*(stoch_I(beta, index)%*%stoch_U(beta, index))
    ll_new <- log_lik(beta_check)
    if (all(is.finite(beta_check)) && ll_new >= ll_old) {
      break
    }
    step <- step / 2
  }
  beta_star <- beta_check
  return(beta_star)
}

##EMV call

resu_EMV_Batch <- function(beta_H1, beta_H2) {
  z <- rep(NA, N)
  z[1] <- log_lik(c(beta_H1[1], beta_H2[1]))
  for (j in 2:N) {
  resu <- EMV_algo(c(beta_H1[j-1],beta_H2[j-1]),j)
  beta_H1[j] <- resu[1]
  beta_H2[j] <- resu[2]
  param <- c(resu[1], resu[2])
  z[j] <- log_lik(param)
  
  if (sqrt(sum((resu - c(beta_H1[j-1],beta_H2[j-1]))^2))< nu) {
    message("Convergence atteinte à l'itération ", j)
    break
  }
  }
  return(c(beta_H1, beta_H2, z))
}

resu_EMV_SG <- function(beta_H1S, beta_H2S) {
  for (j in 2:N) {
  index <- sample(length(x), floor(3*length(x)/4))
  resuS <- EMV_algo_stoch(c(beta_H1S[j-1],beta_H2S[j-1]), j, index)
  beta_H1S[j] <- resuS[1]
  beta_H2S[j] <- resuS[2]
  if (sqrt(sum((resuS - c(beta_H1S[j-1],beta_H2S[j-1]))^2))< nu) {
    message("Convergence atteinte à l'itération ", j)
    break
    }
  }
 return(c(beta_H1S, beta_H2S)) 
}

####EMV calling

##params
eps <- 1e-8
lambda <- 1
nu <- 1e-6
N <- 10000
## initial values
beta_1_init <- -56.5
beta_2_init <- 32

## batch
beta_H1 <- rep(NA, N)
beta_H1[1] <- beta_1_init
beta_H2 <- rep(NA, N)
beta_H2[1] <- beta_2_init

EMV_output <- resu_EMV_Batch(beta_H1, beta_H2)
beta_H1 <- EMV_output[1:N]
beta_H2 <- EMV_output[(N+1):(2*N)]

## "mini batch"
beta_H1S <- rep(NA, N)
beta_H1S[1] <- beta_1_init
beta_H2S <- rep(NA, N)
beta_H2S[1] <- beta_2_init

EMV_output_sg <- resu_EMV_SG(beta_H1S, beta_H2S)
## Convergence atteinte à l'itération 421
beta_H1S <- EMV_output_sg[1:N]
beta_H2S <- EMV_output_sg[(N+1):(2*N)]

###Plot

df <- data.frame(
  iter = 1:N,
  beta_H1 = beta_H1,
  beta_H2 = beta_H2,
  beta_H1S = beta_H1S,
  beta_H2S = beta_H2S
)

df_long <- df %>%
  pivot_longer(cols = -iter, names_to = c("param", "method"), 
               names_pattern = "(beta_H1|beta_H2)(S?)",
               values_to = "value") %>%
  mutate(
    param = recode(param, beta_H1 = "Beta 1", beta_H2 = "Beta 2"),
    method = ifelse(method == "S", "Stochastic", "Batch")
  )

ggplot(df_long, aes(x = iter, y = value, color = method)) +
  geom_line() +
  facet_wrap(~param, scales = "free_y") +
  theme_minimal() +
  labs(
    title = "Convergence of Parameters Over Iterations",
    x = "Iteration",
    y = "Parameter Value",
    color = "Algorithm"
  )
## Warning: Removed 9579 rows containing missing values or values outside the scale range
## (`geom_line()`).

b0_seq <- seq(-1000, 500, length.out =1500)
b1_seq <- seq(-500, 1000, length.out = 1500)
ll_grid <- expand.grid(beta0 = b0_seq, beta1 = b1_seq)

ll_grid$loglik <- apply(ll_grid, 1, function(row) log_lik(c(row[1], row[2])))

library(ggplot2)
ggplot(ll_grid, aes(x = beta0, y = beta1, fill = loglik)) +
  geom_tile() +
  scale_fill_viridis_c() +
  labs(title = "Log-vraisemblance sur la grille des paramètres")

b0_seq <- seq(-5, 5, length.out =10)
b1_seq <- seq(-5, 5, length.out = 10)
ll_grid <- expand.grid(beta0 = b0_seq, beta1 = b1_seq)

ll_grid$loglik <- apply(ll_grid, 1, function(row) log_lik(c(row[1], row[2])))

library(ggplot2)
ggplot(ll_grid, aes(x = beta0, y = beta1, fill = loglik)) +
  geom_tile() +
  scale_fill_viridis_c() +
  labs(title = "Log-vraisemblance sur la grille des paramètres")

start_vec <- c(-50, 28, -1/2, 0, 0, 0, -60, 34)
H_11 <- rep(NA, N)
H_12 <- rep(NA, N)
H_21 <- rep(NA, N)
H_22 <- rep(NA, N)
H_31 <- rep(NA, N)
H_32 <- rep(NA, N)
H_41 <- rep(NA, N)
H_42 <- rep(NA, N)
z1 <- rep(NA, N)
z2 <- rep(NA, N)
z3 <- rep(NA, N)
z4 <- rep(NA, N)
for (j in 1:4) {
  beta_1_init <- start_vec[2*(j-1) +1]
  beta_2_init <- start_vec[2*(j-1) + 2]
  beta_H1 <- rep(NA, N)
  beta_H1[1] <- beta_1_init
  beta_H2 <- rep(NA, N)
  beta_H2[1] <- beta_2_init
  EMV_output <- resu_EMV_Batch(beta_H1, beta_H2)
  beta_H1 <- EMV_output[1:N]
  beta_H2 <- EMV_output[(N+1):(2*N)]
  z <- EMV_output[(2*N+1):(3*N)]
  if (j == 1) {
      H_11 <- beta_H1
      H_12 <- beta_H2
      z1 <- z
  } else if (j == 2) {
    H_21 <- beta_H1
    H_22 <- beta_H2
    z2 <- z
  } else if (j == 3) {
    H_31 <- beta_H1
    H_32 <- beta_H2
    z3 <- z
  } else {
    H_41 <- beta_H1
    H_42 <- beta_H2
    z4 <- z
  }
}
library(plotly)
## 
## Attachement du package : 'plotly'
## L'objet suivant est masqué depuis 'package:ggplot2':
## 
##     last_plot
## L'objet suivant est masqué depuis 'package:stats':
## 
##     filter
## L'objet suivant est masqué depuis 'package:graphics':
## 
##     layout
library(dplyr)

# Combine all trajectories into one data frame
traj_df <- bind_rows(
  data.frame(beta0 = H_11, beta1 = H_12, loglik = z1, traj = "Start 1", iter = 1:N),
  data.frame(beta0 = H_21, beta1 = H_22, loglik = z2, traj = "Start 2", iter = 1:N),
  data.frame(beta0 = H_31, beta1 = H_32, loglik = z3, traj = "Start 3", iter = 1:N),
  data.frame(beta0 = H_41, beta1 = H_42, loglik = z4, traj = "Start 4", iter = 1:N)
)

# Function to get starting, ending, and max-likelihood points
get_special_points <- function(df) {
  df <- df %>% filter(!is.na(beta0))
  start <- df %>% slice(1) %>% mutate(type = "Start")
  end   <- df %>% slice_tail(n=1) %>% mutate(type = "End")
  max_ll <- df %>% slice_max(loglik, n=1) %>% mutate(type = "Max LL")
  bind_rows(start, end, max_ll)
}

special_pts <- traj_df %>%
  group_by(traj) %>%
  group_modify(~get_special_points(.x)) %>%
  ungroup()

# Plot
fig <- plot_ly() %>%
  # Trajectories
  add_trace(data = traj_df, x = ~beta0, y = ~beta1, z = ~loglik, color = ~traj,
            type = 'scatter3d', mode = 'lines', name = ~traj) %>%
  # Starting points
  add_markers(data = special_pts %>% filter(type=="Start"),
              x = ~beta0, y = ~beta1, z = ~loglik,
              marker = list(size = 4, color = 'green'), name = 'Start') %>%
  # Ending points
  add_markers(data = special_pts %>% filter(type=="End"),
              x = ~beta0, y = ~beta1, z = ~loglik,
              marker = list(size = 4, color = 'red'), name = 'End') %>%
  # Max log-likelihood points
  add_markers(data = special_pts %>% filter(type=="Max LL"),
              x = ~beta0, y = ~beta1, z = ~loglik,
              marker = list(size = 4, color = 'blue'), name = 'Max LL') %>%
  layout(scene = list(
    xaxis = list(title = "Beta 0"),
    yaxis = list(title = "Beta 1"),
    zaxis = list(title = "Log-Likelihood")
  ),
  title = "Trajectories of Beta Parameters with Starting, Ending, and Max Log-Likelihood Points"
  )
# Starting points with coordinates as labels

fig
library(ggplot2)
#install.packages("viridis")
library(viridis)
## Le chargement a nécessité le package : viridisLite
# Exemple : coefficients GLM
glm_coef <- coef(glm(cbind(n1, n0) ~ x, family = binomial()))

b0_seq <- seq(-500, 500, length.out =1000)
b1_seq <- seq(-500, 500, length.out = 1000)
ll_grid <- expand.grid(beta0 = b0_seq, beta1 = b1_seq)

ll_grid$loglik <- apply(ll_grid, 1, function(row) log_lik(c(row[1], row[2])))

ggplot(ll_grid, aes(x = beta0, y = beta1, fill = loglik)) +
  geom_tile() +
  scale_fill_viridis_c() +
  labs(title = "Log-vraisemblance sur la grille des paramètres")

ggplot(ll_grid, aes(x = beta0, y = beta1, fill = loglik)) +
  geom_tile() +
  scale_fill_viridis_c() +
  geom_point(aes(x = glm_coef[1], y = glm_coef[2]), color = "red", size = 3, shape = 17) +
  labs(title = "Log-vraisemblance sur la grille des paramètres",
       subtitle = "Le point rouge correspond à la solution GLM") +
  theme_minimal()

plot(x, n1 / (n1+n0), pch=19, xlab="x", ylab="Proportion de succès")

library(ggplot2)

# Points donnés
p1 <- c(beta0 = -50, beta1 = 28)
p2 <- c(beta0 = -60, beta1 = 34)

# Nombre de points le long de la droite
n_pts <- 200

# Générer la droite par interpolation linéaire
beta0_seq <- seq(p1["beta0"], p2["beta0"], length.out = n_pts)
beta1_seq <- seq(p1["beta1"], p2["beta1"], length.out = n_pts)

# Calculer la log-vraisemblance le long de la droite
loglik_seq <- sapply(1:n_pts, function(i) log_lik(c(beta0_seq[i], beta1_seq[i])))

# Mettre dans un data frame
df_line <- data.frame(beta0 = beta0_seq, beta1 = beta1_seq, loglik = loglik_seq)

# Tracer
ggplot(df_line, aes(x = beta0, y = beta1, fill = loglik)) +
  geom_tile() +
  scale_fill_viridis_c() +
  geom_point(aes(x = p1["beta0"], y = p1["beta1"]), color = "green", size = 3) +  # point 1
  geom_point(aes(x = p2["beta0"], y = p2["beta1"]), color = "red", size = 3) +    # point 2
  labs(title = "Évolution de la log-vraisemblance le long de la droite",
       x = expression(beta[0]), y = expression(beta[1]), fill = "Log-likelihood") +
  theme_minimal()

MLE Estimators

c(-56.5,32)
## [1] -56.5  32.0

Predictions

\[(y_j, x_j)^{\perp}, \pi_j := P(Y_{ij} = 1 | x_j) =^{logit} \frac{e^{\beta_0 + \beta_1 x_j}}{1+e^{\beta_0 + \beta_1 x_j}}\]

\[\hat{v_{ij}} = \hat{\beta}_0 + \hat{\beta}_1x_j\]

Computing linear predicted values

lv_hat <- -56.5 + 32*x
lv_hat
## [1] -2.3976 -1.3256 -0.3336  0.5944  1.4616  2.2808  3.0520  3.7848
ggplot(data.frame(x=x, y=lv_hat), aes(x=x, y=y)) + geom_line()

Computing predicted proportions

p_hat <- (n1+n0)*prob(c(-56.5,32))
p_hat
## [1]  4.917998 12.593286 25.876627 36.084948 51.141968 53.529188 59.201865
## [8] 58.667462

Computing quantities Yj for each Xj

Y <- (n1+n0)*prob(c(-56.5,32))

df_plot <- data.frame(
  x = x,
  value = c(Y, n1, (n1+n0)*prob(c(-60,34))),
  type  = rep(c("Predicted", "Observed", "GLM_R"), each = length(x))
)

ggplot(df_plot, aes(x = x, y = value, color = type)) +
  geom_line() +
  geom_point() +
  scale_color_manual(values = c("Predicted" = "red", "Observed" = "green", "GLM_R" = "blue")) +
  labs(title = "Comparison between observations, predictions and glm_R predictions",
       x = "x", y = "Y")

Result Visualisation

Plotting observed proportions vs predicted proportions depending on dose X

df_plot <- data.frame(
  x = x,
  value = c(prob(c(-56.5,32)), n1/(n1+n0), prob(c(-60,34))),
  type  = rep(c("Predicted", "Observed", "GLM_R"), each = length(x))
)

ggplot(df_plot, aes(x = x, y = value, color = type)) +
  geom_line() +
  geom_point() +
  scale_color_manual(values = c("Predicted" = "red", "Observed" = "green", "GLM_R" = "blue")) +
  labs(title = "Comparison between observations, predictions and glm_R predictions",
       x = "x", y = "Proportion")

Commenting on adjustment quality

High adjustment quality for tails; low adjustment quality in between.

Diagnostics

Computing Deviance

glm(cbind(n1, n0) ~ x, family = binomial(link="logit"))
## 
## Call:  glm(formula = cbind(n1, n0) ~ x, family = binomial(link = "logit"))
## 
## Coefficients:
## (Intercept)            x  
##      -60.72        34.27  
## 
## Degrees of Freedom: 7 Total (i.e. Null);  6 Residual
## Null Deviance:       284.2 
## Residual Deviance: 11.23     AIC: 41.43

Computing Pearson Statistics

Probit Model

Data visualization

Plot

Comments

Parameter Estimation

Newton-Raphson Algorithm Implementation up to Convergence

MLE Estimators

Predictions

Computing linear predicted values

Computing predicted proportions

Computing quantities Yj for each Xj

Result Visualisation

Plotting observed proportions vs predicted proportions depending on dose X

Commenting on adjustment quality

Diagnostics

Computing Deviance

Computing Pearson Statistics

Cloglog Model

Data visualization

Plot

Comments

Parameter Estimation

Newton-Raphson Algorithm Implementation up to Convergence

MLE Estimators

c(c(beta_H1[N], beta_H2[N]), c(beta_H1S[N], beta_H2S[N]))
## [1] -212.4963  120.7682        NA        NA

Predictions

Computing linear predicted values

Computing predicted proportions

Computing quantities Yj for each Xj

Result Visualisation

Plotting observed proportions vs predicted proportions depending on dose X

Commenting on adjustment quality

Diagnostics

Computing Deviance

Computing Pearson Statistics

Model Comparison

Synthesis of results

Table of Deviance and Pearson Statistics Deviance for Logit, Probit and Cloglog Models

Estimators Comparisons

Model Selection

Selection criterion based on deviance

Pros and Cons of each model

Conclusion

Which model most adapted to data?

Discussion on Model Selection